
// STEP 0: PREPARE THE BASELINE SURVEY AND ENDLINE SURVEY 
******************************************************************
* open list of students asssigned as treated 
clear 
import excel "$randomization/final/randomization_TVET_mainlist_9Oct2021.xlsx", sheet("Sheet1") firstrow

* keep relevant information 
keep menteeid studentid 
tempfile treatlist12 
save `treatlist12', replace

clear 
import excel using "$randomization/randomization_school_TVET_list.xlsx", clear firstrow 
keep schoolname treat_387432 geoprisize
rename treat_387432 treatschool 
tempfile treatschool12
save `treatschool12', replace

* open processed baseline survey 
use "$clean/grade1112_baseline_reachable_wgrade.dta", clear 
merge 1:1 studentid using `treatlist12'
g treatstudent=_merge==3
drop _merge
lab var treatstudent "1=student assigned as treated"

merge m:1 schoolname using `treatschool12', keep(matched) nogen

* keep only grade 12 students with stream information
* or with missing grade information from these schools 
keep if stream~=99 & (grade==12 | inlist(schoolname, "bjishong central school", "samtengang central school", "wangbama central school", "karma academy"))

* keep relevant information for now 
keep name sex b_* schoolname stream studentid email* phone responseid treat* geo*
g bday_b = b_year*10^4 + b_month*10^2 + b_day
tostring bday_b, replace

rename responseid responseid_b

* format name and phone number
rename name name_original_b
g name = name_original_b
lab var name_original_b "name reported at baseline survey"
replace name = trim(itrim(lower(name)))
replace name = subinstr(name," ","",.)  
replace name = subinstr(name,",","",.)   //Removes comma (,)
replace name = subinstr(name,"'","",.)   //Removes apostrophe (')
replace name = subinstr(name,".","",.)   //Removes dot (.) 
replace name = subinstr(name,"/","",.)   //Removes slash (/)
replace name = subinstr(name,"-","",.)   //Removes dash (-)
replace name = subinstr(name,"(","",.)   //Removes opening parentheses
replace name = subinstr(name,")","",.)   //Removes closing parentheses
replace name = subinstr(name,"mynameis","",.)   //Removes irrelevant info
replace name = subinstr(name,"@educationgovbt","",.)   //Removes irrelevant info
replace name = subinstr(name,"@gmailcom","",.)   //Removes irrelevant info
format name %25s

replace phone = strlower(phone) 
replace phone = subinstr(phone,"na","",.)  
rename phone phone_b

* save as new dataset 
cap drop _merge 
save "$temp/g12_p0b.dta", replace 


// STEP 1: MATCHING USING SCHOOL + ID + F/NAME 
******************************************************************
gl criteria_step1 schoolname studentid 

use "$temp/g12_p0b.dta", clear 
foreach i in name sex stream phone {
	cap rename `i' `i'_b
}
merge 1:1 $criteria_step1 using "$temp/g12_p0e1.dta"
foreach i in name sex stream phone {
	cap rename `i' `i'_e
}
matchit name_b name_e, g(namescore) 

// perfectly matched
preserve
keep if _merge==3 & (namescore>=0.5 | inlist(studentid, "201.00296.11.0373", "201.00297.11.0187"))
drop _merge 
save "$temp/g12_p1m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | ( _merge==3 & ~(namescore>=0.5 | inlist(studentid, "201.00296.11.0373", "201.00297.11.0187")))
keep $criteria_step1 status_e
merge 1:1 $criteria_step1 using "$temp/g12_p0b.dta", nogen keep(matched)
rename studentid studentid_b
save "$temp/g12_p1b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 |  (_merge==3 & ~(namescore>=0.5 | inlist(studentid, "201.00296.11.0373", "201.00297.11.0187")))
keep $criteria_step1 status_e
merge 1:1 $criteria_step1 using "$temp/g12_p0e1.dta", nogen keep(matched) 
append using "$temp/g12_p0e2.dta"
append using "$temp/g12_p0e3.dta"
rename studentid studentid_e
save "$temp/g12_p1e.dta", replace
count
restore


// STEP 2: MATCHING USING SCHOOL + F/NAME + BIRTHDAY
******************************************************************
gl criteria_step2 schoolname b_day b_month b_year

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g12_p1`dataset'.dta", clear
duplicates tag $criteria_step2, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g12_doublecheck_p1`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g12_p1`dataset'_unique.dta", replace 
}


use "$temp/g12_p1b_unique.dta", clear 
foreach i in studentid name sex stream phone {
	cap rename `i' `i'_b
}
merge 1:1 $criteria_step2 using "$temp/g12_p1e_unique.dta"
foreach i in studentid name sex stream phone {
	cap rename `i' `i'_e
}
matchit name_b name_e, g(namescore)

// perfectly matched
preserve
keep if _merge==3 & (namescore>=0.5 | inlist(studentid_b, "201.00221.11.0217", "201.00093.11.0043", "202.00024.11.0007", "202.00003.11.0173"))
drop _merge 
save "$temp/g12_p2m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & ~(namescore>=0.5 | inlist(studentid_b, "201.00221.11.0217", "201.00093.11.0043", "202.00024.11.0007", "202.00003.11.0173")))
keep $criteria_step2 status_e 
merge 1:1 $criteria_step2 using "$temp/g12_p1b_unique.dta", nogen keep(matched)
cap rename studentid studentid_b
append using "$temp/g12_doublecheck_p1b.dta"
save "$temp/g12_p2b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & ~(namescore>=0.5 | inlist(studentid_b, "201.00221.11.0217", "201.00093.11.0043", "202.00024.11.0007", "202.00003.11.0173")))
keep $criteria_step2 status_e
merge 1:1 $criteria_step2 using "$temp/g12_p1e_unique.dta", nogen keep(matched)
cap rename studentid studentid_e
append using "$temp/g12_doublecheck_p1e.dta"
save "$temp/g12_p2e.dta", replace
count
restore



// STEP 3: MATCHING USING SCHOOL + BIRTHDAY + PHONE 
**********************************************************************
gl criteria_step3 schoolname b_day b_month b_year phone 

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g12_p2`dataset'.dta", clear
rename phone_`dataset' phone
duplicates tag $criteria_step3, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g12_doublecheck_p2`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g12_p2`dataset'_unique.dta", replace 
}

use "$temp/g12_p2b_unique.dta", clear 
foreach i in studentid name sex stream {
	cap rename `i' `i'_b
}
merge 1:1 $criteria_step3 using "$temp/g12_p2e_unique.dta" 
foreach i in studentid name sex stream {
	cap rename `i' `i'_e
}

matchit name_original_b name_original_e, g(namescore)

// perfectly matched
preserve
keep if _merge==3 & namescore>=0.6 
drop _merge 
save "$temp/g12_p3m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & namescore<0.6)
keep $criteria_step3 status_e
merge 1:1 $criteria_step3 using "$temp/g12_p2b_unique.dta", nogen keep(matched)
cap rename studentid studentid_b
cap rename name name_b
append using "$temp/g12_doublecheck_p2b.dta"
save "$temp/g12_p3b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & namescore<0.6)
keep $criteria_step3 status_e
merge 1:1 $criteria_step3 using "$temp/g12_p2e_unique.dta", nogen keep(matched)
cap rename studentid studentid_e
cap rename name name_e
append using "$temp/g12_doublecheck_p2e.dta"
save "$temp/g12_p3e.dta", replace
count
restore


// STEP 4: MATCHING USING SCHOOL + SEX + STREAM + PHONE + FUZZYNAME/BIRTHDAY
*******************************************************************
gl criteria_step4 schoolname sex stream phone

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g12_p3`dataset'.dta", clear
duplicates tag $criteria_step4, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g12_doublecheck_p3`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g12_p3`dataset'_unique.dta", replace 
}

use "$temp/g12_p3b_unique.dta", clear 
merge 1:1 $criteria_step4 using "$temp/g12_p3e_unique.dta" 
matchit name_b name_e, g(namescore) 
matchit bday_b bday_e, g(bdayscore) 

// perfectly matched 
preserve 
keep if _merge==3 & namescore>0.5
drop _merge 
save "$temp/g12_p4m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & namescore<=0.5)
keep $criteria_step4 status_e
merge 1:1 $criteria_step4 using "$temp/g12_p3b_unique.dta", nogen keep(matched)
drop  b_*
cap drop *_e
cap rename studentid studentid_e
append using "$temp/g12_doublecheck_p3b.dta"
save "$temp/g12_p4b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & namescore<=0.5)
keep $criteria_step4 status_e
merge 1:1 $criteria_step4 using "$temp/g12_p3e_unique.dta", nogen keep(matched)
drop  b_*
cap drop *_b
append using "$temp/g12_doublecheck_p3e.dta"
save "$temp/g12_p4e.dta", replace
count
restore 


// STEP 5: MATCHING STUDENTID + SEX + STREAM + PHONE + FUZZY NAME/BIRTHDAY 
// HAPPENDS IF STUDENTS TRANSFER 
*********************************************************************
gl criteria_step5 studentid sex stream phone

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g12_p4`dataset'.dta", clear
cap rename studentid_`dataset' studentid
rename schoolname schoolname_`dataset'
duplicates tag $criteria_step5, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g12_doublecheck_p4`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g12_p4`dataset'_unique.dta", replace 
}

use "$temp/g12_p4b_unique.dta", clear 
merge 1:1 $criteria_step5 using "$temp/g12_p4e_unique.dta" 
matchit name_b name_e, g(namescore) 
matchit bday_b bday_e, g(bdayscore) 

// perfectly matched 
preserve 
keep if _merge==3 & (namescore>0.7 | bdayscore>0.7)
drop _merge 
save "$temp/g12_p5m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step5 status_e
merge 1:1 $criteria_step5 using "$temp/g12_p4b_unique.dta", nogen keep(matched)
drop  b_* 
cap drop *_e
append using "$temp/g12_doublecheck_p4b.dta"
save "$temp/g12_p5b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step5 status_e
merge 1:1 $criteria_step5 using "$temp/g12_p4e_unique.dta", nogen keep(matched)
drop  b_*
cap drop *_b
append using "$temp/g12_doublecheck_p4e.dta"
save "$temp/g12_p5e.dta", replace
count
restore 


// STEP 6: MATCHING STUDENTID + SEX + STREAM + FUZZY NAME/BIRTHDAY/PHONE 
// HAPPENDS IF STUDENTS TRANSFER 
*********************************************************************
gl criteria_step6 studentid sex stream 

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g12_p5`dataset'.dta", clear
cap rename studentid_`dataset' studentid
cap rename phone phone_`dataset'
duplicates tag $criteria_step6, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g12_doublecheck_p5`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g12_p5`dataset'_unique.dta", replace 
}

use "$temp/g12_p5b_unique.dta", clear 
merge 1:1 $criteria_step6 using "$temp/g12_p5e_unique.dta" 
matchit name_b name_e, g(namescore) 
matchit bday_b bday_e, g(bdayscore) 
matchit phone_b phone_e, g(phonescore) 

// perfectly matched 
preserve 
keep if _merge==3 & (namescore>0.7 | bdayscore>0.7)
drop _merge 
save "$temp/g12_p6m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step6 status_e
merge 1:1 $criteria_step6 using "$temp/g12_p5b_unique.dta", nogen keep(matched)
drop  b_*
append using "$temp/g12_doublecheck_p5b.dta"
save "$temp/g12_p6b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step6 status_e
merge 1:1 $criteria_step6 using "$temp/g12_p5e_unique.dta", nogen keep(matched)
drop  b_*
append using "$temp/g12_doublecheck_p5e.dta"
save "$temp/g12_p6e.dta", replace
count
restore 


// COMBINING LISTS
******************************************************************
// MATCHED LIST 
clear
forval i=1/6 {
append using "$temp/g12_p`i'm.dta"
cap g dataset = `i'
replace dataset = `i' if missing(dataset)
}
g criteria = ""
replace criteria = "school + studentid + f/name" if dataset==1
replace criteria = "school + birthday + f/name" if dataset==2
replace criteria = "school + birthday + phone" if dataset==3
replace criteria = "school + phone + sex + stream + (fuzzy name | fuzzy birthday)" if dataset==4
replace criteria = "studentid + phone + sex + stream + (fuzzy name | fuzzy birthday)" if dataset==5
replace criteria = "studentid + sex + stream + (fuzzy name | fuzzy birthday)" if dataset==6
replace schoolname_b = schoolname if inlist(dataset, 1, 2, 3, 4)
replace schoolname_e = schoolname if inlist(dataset, 1, 2, 3, 4)
foreach i in studentid {
	foreach j in b e {
	replace `i'_`j' = `i' if inlist(dataset, 1, 5, 6)
}
}
foreach i in phone {
	foreach j in b e {
	replace `i'_`j' = `i' if inlist(dataset, 3, 4, 5)
}
}
foreach i in sex stream {
	foreach j in b e {
	replace `i'_`j' = `i' if inlist(dataset, 4, 5, 6)
}
}

count
drop *score
tab treatstudent
keep responseid* studentid* name* sex* stream* schoolname* phone* criteria treatstudent 
drop sex schoolname stream studentid name phone name_b name_e
save "$temp/grade12_matched_id.dta", replace 


// REQUIRES MANUAL CHECK
use "$temp/g12_p6b.dta", clear
cap drop name_b name
cap drop b_*
foreach i in responseid schoolname name_original bday phone sex stream studentid {
	cap rename `i'_b `i'
}
order responseid schoolname name_original sex stream bday studentid 
lab def stream 1 "arts" 2 "commerce" 3 "science" 4 "rigzhung", modify
lab val stream stream
lab var bday "birthday: YYYY.MM.DD"
save "$temp/grade12_unmatched_baseline.dta", replace

use "$temp/g12_p6e.dta", clear
cap drop name_e name 
cap drop b_*
foreach i in responseid schoolname name_original bday phone sex stream studentid {
	cap rename `i'_e `i'
}
order responseid schoolname name_original sex stream bday studentid 
lab def stream 1 "arts" 2 "commerce" 3 "science" 4 "rigzhung", modify
lab val stream stream
lab var bday "birthday: YYYY.MM.DD"
save "$temp/grade12_unmatched_endline.dta", replace

/*/ EXPORT TO EXCEL FILES TO ASK CHEKU TO MANUALLY MATCH 
foreach i in baseline endline {
use "$temp/grade12_unmatched_`i'.dta", clear
sort schoolname name_original
g shortname = subinstr(schoolname, " school", "",.)
replace shortname = subinstr(shortname, " secondary", "",.)
replace shortname = subinstr(shortname, " middle", "",.)
replace shortname = subinstr(shortname, " central", "",.)
replace shortname = subinstr(shortname, " higher", "",.)

cap erase "$endline/grade12_`i'_list_manual_match.xlsx"
levelsof shortname, local(schoolist)
foreach school of local schoolist {
	preserve 
	keep if shortname=="`school'"
	drop shortname
	qui export excel using "$endline/grade12_`i'_list_manual_match.xlsx", firstrow(variables) sheet("`school'") sheetreplace cell(A1) 
	restore
}
}
*/

// OPEN MANUAL CHECK LIST BY CHECKU 
use "$temp/grade12_unmatched_baseline.dta", clear
sort schoolname name_original
g shortname = subinstr(schoolname, " school", "",.)
replace shortname = subinstr(shortname, " secondary", "",.)
replace shortname = subinstr(shortname, " middle", "",.)
replace shortname = subinstr(shortname, " central", "",.)
replace shortname = subinstr(shortname, " higher", "",.)
drop if inlist(shortname, "bjishong", "wangbama", "samtengang", "karma academy")
levelsof shortname, local(schoolist)
foreach school of local schoolist {
	qui import excel using "$raw/manual/match_cheku/grade12_baseline_list_manual_match.xlsx", sheet("`school'") firstrow clear 
	rename _all, lower
	cap rename responseid_endline responseid_e
	keep responseid* schoolname name*
	drop if missing(responseid)
	qui save "$temp/file_grade12_`school'", replace  
}

* append data together
global tempfilelist: dir "$temp/" files "file_*.dta"
foreach tfile of global tempfilelist {
append using "$temp/`tfile'", force
erase "$temp/`tfile'"
}

duplicates drop responseid schoolname name_original, force 

preserve 
keep if missing(responseid_e)
merge 1:1 responseid schoolname using "$temp/grade12_unmatched_baseline.dta", keep(matched) nogen
foreach i in responseid schoolname name_original sex bday studentid phone {
	rename `i' `i'_b
}
save "$temp/grade12_unmatched_baseline_after_manualmatch.dta", replace 
restore

keep if ~missing(responseid_e)

* merge with baseline data 
merge 1:1 responseid schoolname using "$temp/grade12_unmatched_baseline.dta", keep(matched) nogen 
foreach i in responseid schoolname name_original sex bday studentid phone {
	rename `i' `i'_b
}
* merge with endline data
rename responseid_e responseid
merge 1:1 responseid using "$temp/grade12_unmatched_endline.dta", keep(matched) nogen 
foreach i in responseid schoolname name_original sex bday studentid phone {
	cap rename `i' `i'_e
}
g criteria = "manual matching by Cheku's team"
keep responseid_b name_original_b sex_b phone_b treatstudent responseid_e name_original_e sex_e phone_e studentid_b studentid_e schoolname_b schoolname_e criteria status_e

append using "$temp/grade12_matched_id.dta"
save "$temp/grade12_matched_final.dta", replace 





use "$temp/g12_p0b.dta", clear
foreach i in responseid schoolname name_original sex bday studentid phone stream {
	cap rename `i' `i'_b
}
merge 1:1 responseid_b schoolname_b using "$temp/grade12_matched_final.dta"
g followup=1 if _merge==3 
replace followup=0 if _merge==1
drop _merge 
drop status_e 
g note="need to check if grade 12 student" if followup==0 & inlist(schoolname_b, "bjishong central school", "samtengang central school", "wangbama central school", "karma academy")

lab def stream 1 "Arts" 2 "Commerce" 3 "Science" 4 "Rigzhung", modify
lab val stream_b stream

save "$endline/grade12_followup_track.dta", replace 

* erase temporary datasets
forval i=1/5 {
global filelist: dir "$temp/" files "g12_*p`i'*.dta"
foreach tfile of global filelist {
    erase "$temp/`tfile'"
}
}

/*/ Ask Cheku to check for grade information
use "$endline/grade12_followup_track.dta", clear
keep if followup==0 & inlist(schoolname_b, "bjishong central school", "samtengang central school", "wangbama central school", "karma academy")
sort schoolname_b responseid_b
keep responseid_b schoolname_b name_original_b sex_b b_day b_month b_year stream_b studentid_b phone_b note
order responseid_b schoolname_b name_original_b b_day b_month b_year sex_b stream_b studentid_b phone_b note
export excel using "$endline/grade12_followup_track_check_grade.xlsx", replace firstrow("variables")
*/


// Incoporate grade information from Cheku 
clear 
import excel using "$raw/manual/grade12_followup_track_check_grade.xlsx", firstrow
keep responseid_b schoolname_b Grade1112academicyear2021
keep if Grade1112academicyear2021=="12"
drop Grade1112academicyear2021
merge 1:1 responseid_b schoolname_b using "$endline/grade12_followup_track.dta", keep(matched) nogen
tempfile grade12
save `grade12', replace 

use "$endline/grade12_followup_track.dta", clear 
drop if followup==0 & inlist(schoolname_b, "bjishong central school", "samtengang central school", "wangbama central school", "karma academy")
append using `grade12'

// Save as current list 
drop note
sort schoolname_b 

* list of not-yet-surveyed students
preserve 
keep if missing(responseid_e)
tempfile notsurvey
save `notsurvey', replace
restore 

* list of surveyed students - to check if duplicates 
drop if missing(responseid_e)
duplicates tag responseid_e, g(temp)
duplicates drop responseid_e, force 
drop temp 
append using `notsurvey'
save "$endline/grade12_endline_$date.dta", replace 


// CHECK DATA QUALITY 
// COMPUTE FOLLOWUP RATES BY SCHOOL, BY DISTRICT, BY STUDENT STATUS 
use "$endline/grade12_endline_$date.dta", clear 
keep if ~missing(responseid_e)
unique responseid_e
duplicates tag responseid_e, g(temp)
keep responseid_b schoolname_b responseid_e
tempfile matching_id
save `matching_id', replace 

clear
import delimited "$endlinecsv", clear
rename responseid responseid_e
merge 1:1 responseid_e using `matching_id', keep(matched) nogen
keep if status=="Partial"
tempfile partial
save `partial', replace 

use "$endline/grade12_endline_$date.dta", clear
preserve 
keep if missing(responseid_e)
tempfile notsurvey
save `notsurvey', replace
restore

drop if missing(responseid_e)
merge 1:1 responseid_e using `partial'
replace followup=2 if _merge==3
drop _merge 

append using `notsurvey'
rename schoolname_b schoolname 
merge m:1 schoolname using "$clean/list_of_schools_endline.dta", keep(matched) nogen keepusing(district)
rename schoolname schoolname_b

save "$endline/grade12_endline_$date.dta", replace 


use "$endline/grade12_endline_$date.dta", clear 
* total students at baseline 
bys schoolname_b: egen totstudent_b = total(~missing(responseid_b))

* total students can be followed up: 
bys schoolname_b: egen totstudent_e = total(followup==1|followup==2)
* total students can be followed up: complete submission
bys schoolname_b: egen totstudent_ec = total(followup==1)
* total students can be followed up: partial submission
bys schoolname_b: egen totstudent_ep = total(followup==2)

* total treatstudents at baseline
bys schoolname_b: egen totstudent_bt = total(treatstudent==1)
* total nontreatstudents at baseline
bys schoolname_b: egen totstudent_bn = total(treatstudent==0)

* total treatstudents at baseline that can be followed up:
bys schoolname_b: egen totstudent_et = total(treatstudent==1 & (followup==1|followup==2))
* total treatstudents at baseline that can be followed up: complete submission
bys schoolname_b: egen totstudent_ect = total(treatstudent==1 & followup==1)
* total treatstudents at baseline that can be followed up: partial submission
bys schoolname_b: egen totstudent_ept = total(treatstudent==1 & followup==2)

* total nontreatstudents at baseline that can be followed up:
bys schoolname_b: egen totstudent_en = total(treatstudent==0 & (followup==1|followup==2))
* total nontreatstudents at baseline that can be followed up: complete submission 
bys schoolname_b: egen totstudent_ecn = total(treatstudent==0 & followup==1)
* total nontreatstudents at baseline that can be followed up: complete submission 
bys schoolname_b: egen totstudent_epn = total(treatstudent==0 & followup==2)

duplicates drop schoolname_b, force

* follow-up rate by school
g followrate = totstudent_e/totstudent_b*100
lab var followrate "followup rate at school level"
g followrate_c = totstudent_ec/totstudent_b*100
lab var followrate "followup rate at school level: complete submission"
g followrate_p = totstudent_ep/totstudent_b*100
lab var followrate "followup rate at school level: partial submission"

// summary table of overall survey rate by school groups
preserve 
collapse (sum) totstudent_*, by(treatschool)
qui {
summ totstudent_bt if treatschool==1
scalar A = r(mean)
summ totstudent_bn if treatschool==1
scalar B = r(mean)
summ totstudent_bn if treatschool==0
scalar C = r(mean)

summ totstudent_et if treatschool==1
scalar D = r(mean)
summ totstudent_en if treatschool==1
scalar E = r(mean)
summ totstudent_en if treatschool==0
scalar F = r(mean)

summ totstudent_ect if treatschool==1
scalar G = r(mean)
summ totstudent_ecn if treatschool==1
scalar H = r(mean)
summ totstudent_ecn if treatschool==0
scalar I = r(mean)
}

mat overall = (A, B, C / D, E, F / G, H, I / D/A*100, E/B*100, F/C*100 / G/A*100, H/B*100, I/C*100)
mat list overall
qui svmat overall
keep overall*
g variable=""
replace variable = "total students" if _n==1
replace variable = "total surveyed" if _n==2
replace variable = "total surveyed: complete submission" if _n==3
replace variable = "follow-up rate (%)" if _n==4
replace variable = "follow-up rate (%): complete submission" if _n==5
order variable overall*
rename overall1 treated_treatschool
rename overall2 untreated_treatschool
rename overall3 untreated_school  
export excel using "$endline/grade12_endline_tracking_$date.xlsx", firstrow(variables) sheet("overall") sheetreplace cell(B2) 
restore 

// summary table of survey rates for treated students, nontreated students by school strata 
preserve 
collapse (sum) totstudent_*, by(treatschool geoprisize)
qui {
forval i=1/8 {	
summ totstudent_bt if treatschool==1 & geoprisize==`i'
scalar A`i' = r(mean)
summ totstudent_bn if treatschool==1 & geoprisize==`i'
scalar B`i' = r(mean)
summ totstudent_bn if treatschool==0 & geoprisize==`i'
scalar C`i' = r(mean)
summ totstudent_et if treatschool==1 & geoprisize==`i'
scalar D`i' = r(mean)
summ totstudent_en if treatschool==1 & geoprisize==`i'
scalar E`i' = r(mean)
summ totstudent_en if treatschool==0 & geoprisize==`i'
scalar F`i' = r(mean)
mat t`i' = (A`i', B`i', C`i' / D`i', E`i', F`i' / D`i'/A`i'*100, E`i'/B`i'*100, F`i'/C`i'*100)
}
}
mat overall = (t1/t2/t3/t4/t5/t6/t7/t8)
mat list overall
qui svmat overall
keep overall*
g variable=""
replace variable = "total students" if mod(_n,3)==1
replace variable = "total surveyed" if mod(_n,3)==2
replace variable = "follow-up rate (%)" if mod(_n,3)==0
g geoprisize=.
forval i=1/8 {
	replace geoprisize=`i' if int((_n+2)/3)==`i'
}
order geoprisize variable overall*
rename overall1 treated_treatschool
rename overall2 untreated_treatschool
rename overall3 untreated_school 
export excel using "$endline/grade12_endline_tracking_$date.xlsx", firstrow(variables) sheet("by_strata") sheetreplace cell(B2) 
restore 


// summary table of survey rates for treated students, nontreated students by districts 
preserve 
collapse (sum) totstudent_*, by(treatschool district)
fillin district treatschool
sort treatschool district 
g n_district=_n
replace n_district=_n-10 if _n>=11
qui {
forval i=1/10 {	
cap summ totstudent_bt if treatschool==1 & n_district==`i'
scalar A`i' = r(mean)
summ totstudent_bn if treatschool==1 & n_district==`i'
scalar B`i' = r(mean)
summ totstudent_bn if treatschool==0 & n_district==`i'
scalar C`i' = r(mean)
summ totstudent_et if treatschool==1 & n_district==`i'
scalar D`i' = r(mean)
summ totstudent_en if treatschool==1 & n_district==`i'
scalar E`i' = r(mean)
summ totstudent_en if treatschool==0 & n_district==`i'
scalar F`i' = r(mean)
mat t`i' = (A`i', B`i', C`i' / D`i', E`i', F`i' / D`i'/A`i'*100, E`i'/B`i'*100, F`i'/C`i'*100)
}
}
mat overall = (t1/t2/t3/t4/t5/t6/t7/t8/t9/t10)
mat list overall
qui svmat overall
keep overall*
g variable=""
replace variable = "total students" if mod(_n,3)==1
replace variable = "total surveyed" if mod(_n,3)==2
replace variable = "follow-up rate (%)" if mod(_n,3)==0
g n_district=.
forval i=1/10 {
	replace n_district=`i' if int((_n+2)/3)==`i'
}
g district=""
replace district="Chhukha" if n_district==1
replace district="Dagana" if n_district==2
replace district="Gasa" if n_district==3
replace district="Paro" if n_district==4
replace district="Punakha" if n_district==5
replace district="Thimphu" if n_district==6
replace district="Trashigang" if n_district==7
replace district="Trashiyangtse" if n_district==8
replace district="Tsirang" if n_district==9
replace district="Wangdue Phodrang" if n_district==10
drop n_district
order district variable overall*
rename overall1 treated_treatschool
rename overall2 untreated_treatschool
rename overall3 untreated_school 
export excel using "$endline/grade12_endline_tracking_$date.xlsx", firstrow(variables) sheet("by_district") sheetreplace cell(B2) 
restore 

// summary table of survey rates for treated students, nontreated students by each school 
* survey rate for treat students at school level 
g followrate_treated = totstudent_et/totstudent_bt*100
lab var followrate_treated "survey rate for treat students at school level"

g followrate_ntreated = totstudent_en/totstudent_bn*100
lab var followrate_ntreated "survey rate for nontreat students at school level"

keep district schoolname_b followrate* treatschool geoprisize totstudent_*
rename totstudent_b students_at_baseline
rename totstudent_e students_did_endline
rename totstudent_bt treated_students_at_baseline
rename totstudent_bn nontreated_students_at_baseline
rename totstudent_et treated_students_did_endline 
rename totstudent_en nontreated_students_did_endline
drop tot*
drop followrate_c followrate_p 
order district schoolname_b followrate* treatschool geoprisize
sort district schoolname_b 

export excel using "$endline/grade12_endline_tracking_$date.xlsx", firstrow(variables) sheet("by_school") sheetreplace cell(B2) 
	

// LIST FOR FOLLOW-UP SURVEY 
clear
import excel using "$endline/grade12_endline_tracking_$date.xlsx", sheet("by_school") firstrow 
merge 1:m schoolname_b using "$endline/grade12_endline_$date.dta", nogen 
cap erase "$endline/grade12_list_for_followup_$date.xlsx"
keep if followup==0
keep responseid_b name_original_b sex_b schoolname_b b_day b_month b_year studentid_b stream_b email phone_b treatstudent treatschool followrate followrate_treated followrate_ntreated
order responseid_b schoolname_b name_original_b sex_b b_day b_month b_year studentid_b stream_b email phone_b treatstudent treatschool followrate followrate_treated followrate_ntreated
sort followrate followrate_treated followrate_ntreated schoolname_b 
g note = "have not done followup survey/poor quality data"


export excel using "$endline/grade12_target_list_for_followup_$date.xlsx", firstrow(variables) sheet("Summary") sheetreplace cell(A1) 
g shortname = subinstr(schoolname_b, " school", "",.)
replace shortname = subinstr(shortname, " secondary", "",.)
replace shortname = subinstr(shortname, " middle", "",.)
replace shortname = subinstr(shortname, " central", "",.)
replace shortname = subinstr(shortname, " higher", "",.)

levelsof shortname, local(schoolist)

foreach school of local schoolist {
	preserve 
	keep if shortname=="`school'"
	drop shortname
	qui export excel using "$endline/grade12_target_list_for_followup_$date.xlsx", firstrow(variables) sheet("`school'") sheetreplace cell(A1) 
	restore
}	




